import datatable as dt
import time

# 生成大型数据集 (1000万行)
start_time = time.time()
large_df = dt.Frame(
    id=range(10_000_000),
    value=[x % 100 for x in range(10_000_000)],
    category=[f"cat_{x % 10}" for x in range(10_000_000)]
)
print(f"创建数据集耗时: {time.time() - start_time:.2f}秒")

# 分组聚合性能测试
start_time = time.time()
result = large_df[:, dt.count(), dt.by("category")]
print(f"分组聚合耗时: {time.time() - start_time:.2f}秒")
print(result)